Back to Article
Group by Time Effects
Download Source

Group by Time Effects

Author

Kendra Wyant

Published

June 8, 2025

Set Up Environment

In [1]:

# handle conflicts
options(conflicts.policy = "depends.ok")
devtools::source_url("https://github.com/jjcurtin/lab_support/blob/main/fun_ml.R?raw=true")
ℹ SHA-1 hash of file is "32a0bc8ced92c79756b56ddcdc9a06e639795da6"

tidymodels_conflictRules()
In [2]:

suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(tidymodels))
suppressPackageStartupMessages(library(tidyposterior))
library(kableExtra, exclude = "group_rows")
library(Rcpp, exclude = "populate")
library(brms, exclude = c("ar", "mixture"))
Loading 'brms' package (version 2.22.0). Useful instructions
can be found by typing help('brms'). A more detailed introduction
to the package is available through vignette('brms_overview').

theme_set(theme_classic()) 
In [3]:

devtools::source_url("https://github.com/jjcurtin/lab_support/blob/main/format_path.R?raw=true")
ℹ SHA-1 hash of file is "de12d764438078a9341db9bc0b2472c87e0ae846"

# CHTC support functions
devtools::source_url("https://github.com/jjcurtin/lab_support/blob/main/chtc/static_files/fun_chtc.R?raw=true")
ℹ SHA-1 hash of file is "6112ad4934c18197bb71037f5d65b97e1fd2b039"
In [4]:
path_processed <- format_path(str_c("studydata/risk/data_processed/lag"))
path_models_lag <- format_path(str_c("studydata/risk/models/lag"))

Read in Model Performance Metrics

In [5]:
auroc_dem_0 <- read_csv(here::here(path_models_lag, 
                                   "test_auroc_6_x_5_1day_0_v3_nested_strat_lh_fairness.csv"),
                      col_types = cols()) |> 
  mutate(fold_num = rep(1:5, 6),
         repeat_num = c(rep(1, 5), rep(2, 5), rep(3, 5), 
                        rep(4, 5), rep(5, 5), rep(6, 5))) |> 
  mutate(across(everything(), ~if_else(.x == 0, .0000001, .x))) |> 
  select(-outer_split_num)


auroc_dem_24 <- read_csv(here::here(path_models_lag, 
                                    "test_auroc_6_x_5_1day_24_v3_nested_strat_lh_fairness.csv"),
                      col_types = cols())  |> 
  mutate(across(everything(), ~if_else(.x == 0, .0000001, .x))) |> 
  mutate(fold_num = rep(1:5, 6),
         repeat_num = c(rep(1, 5), rep(2, 5), rep(3, 5), 
                        rep(4, 5), rep(5, 5), rep(6, 5))) |> 
  select(-outer_split_num)

auroc_dem_72 <- read_csv(here::here(path_models_lag, 
                                    "test_auroc_6_x_5_1day_72_v3_nested_strat_lh_fairness.csv"),
                      col_types = cols()) |> 
  mutate(across(everything(), ~if_else(.x == 0, .0000001, .x))) |> 
  mutate(fold_num = rep(1:5, 6),
         repeat_num = c(rep(1, 5), rep(2, 5), rep(3, 5), 
                        rep(4, 5), rep(5, 5), rep(6, 5))) |> 
  select(-outer_split_num)

auroc_dem_168 <- read_csv(here::here(path_models_lag, 
                                     "test_auroc_6_x_5_1day_168_v3_nested_strat_lh_fairness.csv"),
                      col_types = cols())  |> 
  mutate(across(everything(), ~if_else(.x == 0, .0000001, .x))) |> 
  mutate(fold_num = rep(1:5, 6),
         repeat_num = c(rep(1, 5), rep(2, 5), rep(3, 5), 
                        rep(4, 5), rep(5, 5), rep(6, 5))) |> 
  select(-outer_split_num)

auroc_dem_336 <- read_csv(here::here(path_models_lag, 
                                     "test_auroc_6_x_5_1day_336_v3_nested_strat_lh_fairness.csv"),
                      col_types = cols())  |> 
  arrange(outer_split_num) |>
  mutate(across(everything(), ~if_else(.x == 0, .0000001, .x))) |> 
  mutate(fold_num = rep(1:5, 6),
         repeat_num = c(rep(1, 5), rep(2, 5), rep(3, 5), 
                        rep(4, 5), rep(5, 5), rep(6, 5))) |> 
  select(-outer_split_num)
In [6]:
auroc_dem_all <- auroc_dem_0 |> 
  mutate(lag = 0) |> 
  bind_rows(auroc_dem_24 |> 
              mutate(lag = 24)) |>
  bind_rows(auroc_dem_72 |> 
              mutate(lag = 72)) |>
  bind_rows(auroc_dem_168 |> 
              mutate(lag = 168)) |>
  bind_rows(auroc_dem_336 |> 
              mutate(lag = 336))

set.seed(101)

Race

In [7]:
data <- auroc_dem_all |> 
  select(id = fold_num, id2 = repeat_num, `not white`, `non-hispanic white` = white, lag) |> 
  pivot_longer(cols = c(`not white`, `non-hispanic white`), names_to = "race", values_to = "auroc") |> 
  mutate(race = factor(race)) |>
  glimpse()
Rows: 300
Columns: 5
$ id    <dbl> 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 1, 1…
$ id2   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3…
$ lag   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ race  <fct> not white, non-hispanic white, not white, non-hispanic white, no…
$ auroc <dbl> 0.8624530, 0.9032410, 0.8416644, 0.9328614, 0.9510940, 0.9253825…

Set priors to perf_mod() defaults

In [8]:
priors <- c(
  prior(normal(2, 1.1), class = "Intercept"),
  
  prior(normal(0, 2.79), class = "b"),

  prior(exponential(2.2), class = "sigma")
)
In [9]:
model_race <- brm(
  formula = auroc ~ 1 + race + lag + race*lag + (1 | id2/id), # folds nested in repeats
  data = subset(data, !is.na(auroc)),
  family = gaussian(link = "logit"), # normal distribution w/auroc bounded between 0 and 1
  chains = 4,
  prior = priors,
  control = list(adapt_delta = 0.99), 
  iter = 6000,
  thin = 10,
  seed = 123
)
Compiling Stan program...
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 13.1.6 (clang-1316.0.21.2.5)’
using SDK: ‘MacOSX12.3.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
         ^~~~~~~
1 error generated.
make: *** [foo.o] Error 1
Start sampling

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 7.7e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.77 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 6000 [  0%]  (Warmup)
Chain 1: Iteration:  600 / 6000 [ 10%]  (Warmup)
Chain 1: Iteration: 1200 / 6000 [ 20%]  (Warmup)
Chain 1: Iteration: 1800 / 6000 [ 30%]  (Warmup)
Chain 1: Iteration: 2400 / 6000 [ 40%]  (Warmup)
Chain 1: Iteration: 3000 / 6000 [ 50%]  (Warmup)
Chain 1: Iteration: 3001 / 6000 [ 50%]  (Sampling)
Chain 1: Iteration: 3600 / 6000 [ 60%]  (Sampling)
Chain 1: Iteration: 4200 / 6000 [ 70%]  (Sampling)
Chain 1: Iteration: 4800 / 6000 [ 80%]  (Sampling)
Chain 1: Iteration: 5400 / 6000 [ 90%]  (Sampling)
Chain 1: Iteration: 6000 / 6000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 15.169 seconds (Warm-up)
Chain 1:                11.724 seconds (Sampling)
Chain 1:                26.893 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 2.7e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 6000 [  0%]  (Warmup)
Chain 2: Iteration:  600 / 6000 [ 10%]  (Warmup)
Chain 2: Iteration: 1200 / 6000 [ 20%]  (Warmup)
Chain 2: Iteration: 1800 / 6000 [ 30%]  (Warmup)
Chain 2: Iteration: 2400 / 6000 [ 40%]  (Warmup)
Chain 2: Iteration: 3000 / 6000 [ 50%]  (Warmup)
Chain 2: Iteration: 3001 / 6000 [ 50%]  (Sampling)
Chain 2: Iteration: 3600 / 6000 [ 60%]  (Sampling)
Chain 2: Iteration: 4200 / 6000 [ 70%]  (Sampling)
Chain 2: Iteration: 4800 / 6000 [ 80%]  (Sampling)
Chain 2: Iteration: 5400 / 6000 [ 90%]  (Sampling)
Chain 2: Iteration: 6000 / 6000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 15.811 seconds (Warm-up)
Chain 2:                10.441 seconds (Sampling)
Chain 2:                26.252 seconds (Total)
Chain 2: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 2.3e-05 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 6000 [  0%]  (Warmup)
Chain 3: Iteration:  600 / 6000 [ 10%]  (Warmup)
Chain 3: Iteration: 1200 / 6000 [ 20%]  (Warmup)
Chain 3: Iteration: 1800 / 6000 [ 30%]  (Warmup)
Chain 3: Iteration: 2400 / 6000 [ 40%]  (Warmup)
Chain 3: Iteration: 3000 / 6000 [ 50%]  (Warmup)
Chain 3: Iteration: 3001 / 6000 [ 50%]  (Sampling)
Chain 3: Iteration: 3600 / 6000 [ 60%]  (Sampling)
Chain 3: Iteration: 4200 / 6000 [ 70%]  (Sampling)
Chain 3: Iteration: 4800 / 6000 [ 80%]  (Sampling)
Chain 3: Iteration: 5400 / 6000 [ 90%]  (Sampling)
Chain 3: Iteration: 6000 / 6000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 15.195 seconds (Warm-up)
Chain 3:                7.318 seconds (Sampling)
Chain 3:                22.513 seconds (Total)
Chain 3: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 2.3e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 6000 [  0%]  (Warmup)
Chain 4: Iteration:  600 / 6000 [ 10%]  (Warmup)
Chain 4: Iteration: 1200 / 6000 [ 20%]  (Warmup)
Chain 4: Iteration: 1800 / 6000 [ 30%]  (Warmup)
Chain 4: Iteration: 2400 / 6000 [ 40%]  (Warmup)
Chain 4: Iteration: 3000 / 6000 [ 50%]  (Warmup)
Chain 4: Iteration: 3001 / 6000 [ 50%]  (Sampling)
Chain 4: Iteration: 3600 / 6000 [ 60%]  (Sampling)
Chain 4: Iteration: 4200 / 6000 [ 70%]  (Sampling)
Chain 4: Iteration: 4800 / 6000 [ 80%]  (Sampling)
Chain 4: Iteration: 5400 / 6000 [ 90%]  (Sampling)
Chain 4: Iteration: 6000 / 6000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 15.704 seconds (Warm-up)
Chain 4:                9.199 seconds (Sampling)
Chain 4:                24.903 seconds (Total)
Chain 4: 
In [10]:
summary(model_race) 
 Family: gaussian 
  Links: mu = logit; sigma = identity 
Formula: auroc ~ 1 + race + lag + race * lag + (1 | id2/id) 
   Data: subset(data, !is.na(auroc)) (Number of observations: 293) 
  Draws: 4 chains, each with iter = 6000; warmup = 3000; thin = 10;
         total post-warmup draws = 1200

Multilevel Hyperparameters:
~id2 (Number of levels: 6) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     0.33      0.31     0.01     1.15 1.01      907     1175

~id2:id (Number of levels: 30) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     1.20      0.20     0.88     1.66 1.00     1048     1173

Regression Coefficients:
                 Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept            4.06      0.43     3.26     4.96 1.00     1050     1171
racenotwhite        -2.82      0.33    -3.53    -2.24 1.00     1155     1139
lag                 -0.00      0.00    -0.00     0.00 1.00     1316     1112
racenotwhite:lag     0.00      0.00    -0.00     0.00 1.00     1319     1112

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     0.10      0.00     0.09     0.11 1.00     1315     1172

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
In [11]:
pp_race <- summary(model_race)$fixed |>
  as_tibble(rownames = "coef") |> 
  select(coef,
         pp_mean = Estimate, 
         pp_lower = `l-95% CI`, 
         pp_upper = `u-95% CI`) 

plot posterior distribution for race effect

In [12]:
as.matrix(model_race, variable = "b_racenotwhite") |> 
  as_tibble() |> 
  ggplot(aes(x = b_racenotwhite)) +
  geom_histogram(fill = "grey", color = "black", bins = 30) +
  geom_segment(mapping = aes(y = 250, yend = 300, x = pp_mean, xend = pp_mean),
               data = subset(pp_race, coef == "racenotwhite")) +
  geom_segment(mapping = aes(y = 275, yend = 275, x = pp_lower, xend = pp_upper),
                data = subset(pp_race, coef == "racenotwhite")) +
  geom_vline(xintercept = 0, linetype =  "dashed") 

plot posterior distribution for interaction effect

In [13]:
as.matrix(model_race, variable = "b_racenotwhite:lag") |> 
  ggplot(aes(x = `b_racenotwhite:lag`)) +
  geom_histogram(fill = "grey", color = "black", bins = 30) +
    geom_segment(mapping = aes(y = 100, yend = 150, x = pp_mean, xend = pp_mean),
               data = subset(pp_race, coef == "racenotwhite:lag")) +
  geom_segment(mapping = aes(y = 125, yend = 125, x = pp_lower, xend = pp_upper),
                data = subset(pp_race, coef == "racenotwhite:lag")) +
  geom_vline(xintercept = 0, linetype =  "dashed") 

Check convergence diagnostics

In [14]:
bayesplot::mcmc_trace(model_race, pars = c("b_Intercept", "b_racenotwhite", "b_lag", "b_racenotwhite:lag"))

bayesplot::mcmc_acf(model_race, pars = c("b_Intercept", "b_racenotwhite", "b_lag", "b_racenotwhite:lag"))

bayesplot::mcmc_dens(model_race, pars = c("b_Intercept", "b_racenotwhite", "b_lag", "b_racenotwhite:lag"))

Check posteriors

In [15]:
pp_check(model_race)
Using 10 posterior draws for ppc type 'dens_overlay' by default.

Sex

In [16]:
data <- auroc_dem_all |> 
  select(id = fold_num, id2 = repeat_num, female, male, lag) |> 
  pivot_longer(cols = c(female, male), names_to = "sex", values_to = "auroc") |>
  mutate(sex = factor(sex, levels = c("male", "female"))) |>
  glimpse()
Rows: 300
Columns: 5
$ id    <dbl> 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 1, 1…
$ id2   <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3…
$ lag   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
$ sex   <fct> female, male, female, male, female, male, female, male, female, …
$ auroc <dbl> 0.9245708, 0.8477054, 0.8592244, 0.9544339, 0.9019895, 0.9756489…
model_sex <-brm(
  formula = auroc ~ 1 + sex + lag + sex*lag + (1 | id2/id), # folds nested in repeats
  data = subset(data, !is.na(auroc)),
  family = gaussian(link = "logit"), # normal distribution w/auroc bounded between 0 and 1
  chains = 4,
  prior = priors,
  control = list(adapt_delta = 0.99), 
  iter = 6000,
  thin = 10,
  seed = 123
)
Compiling Stan program...
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 13.1.6 (clang-1316.0.21.2.5)’
using SDK: ‘MacOSX12.3.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
         ^~~~~~~
1 error generated.
make: *** [foo.o] Error 1
Start sampling

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.000259 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 2.59 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 6000 [  0%]  (Warmup)
Chain 1: Iteration:  600 / 6000 [ 10%]  (Warmup)
Chain 1: Iteration: 1200 / 6000 [ 20%]  (Warmup)
Chain 1: Iteration: 1800 / 6000 [ 30%]  (Warmup)
Chain 1: Iteration: 2400 / 6000 [ 40%]  (Warmup)
Chain 1: Iteration: 3000 / 6000 [ 50%]  (Warmup)
Chain 1: Iteration: 3001 / 6000 [ 50%]  (Sampling)
Chain 1: Iteration: 3600 / 6000 [ 60%]  (Sampling)
Chain 1: Iteration: 4200 / 6000 [ 70%]  (Sampling)
Chain 1: Iteration: 4800 / 6000 [ 80%]  (Sampling)
Chain 1: Iteration: 5400 / 6000 [ 90%]  (Sampling)
Chain 1: Iteration: 6000 / 6000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 26.822 seconds (Warm-up)
Chain 1:                10.119 seconds (Sampling)
Chain 1:                36.941 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 2.4e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.24 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 6000 [  0%]  (Warmup)
Chain 2: Iteration:  600 / 6000 [ 10%]  (Warmup)
Chain 2: Iteration: 1200 / 6000 [ 20%]  (Warmup)
Chain 2: Iteration: 1800 / 6000 [ 30%]  (Warmup)
Chain 2: Iteration: 2400 / 6000 [ 40%]  (Warmup)
Chain 2: Iteration: 3000 / 6000 [ 50%]  (Warmup)
Chain 2: Iteration: 3001 / 6000 [ 50%]  (Sampling)
Chain 2: Iteration: 3600 / 6000 [ 60%]  (Sampling)
Chain 2: Iteration: 4200 / 6000 [ 70%]  (Sampling)
Chain 2: Iteration: 4800 / 6000 [ 80%]  (Sampling)
Chain 2: Iteration: 5400 / 6000 [ 90%]  (Sampling)
Chain 2: Iteration: 6000 / 6000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 26.307 seconds (Warm-up)
Chain 2:                9.913 seconds (Sampling)
Chain 2:                36.22 seconds (Total)
Chain 2: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 2.8e-05 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.28 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 6000 [  0%]  (Warmup)
Chain 3: Iteration:  600 / 6000 [ 10%]  (Warmup)
Chain 3: Iteration: 1200 / 6000 [ 20%]  (Warmup)
Chain 3: Iteration: 1800 / 6000 [ 30%]  (Warmup)
Chain 3: Iteration: 2400 / 6000 [ 40%]  (Warmup)
Chain 3: Iteration: 3000 / 6000 [ 50%]  (Warmup)
Chain 3: Iteration: 3001 / 6000 [ 50%]  (Sampling)
Chain 3: Iteration: 3600 / 6000 [ 60%]  (Sampling)
Chain 3: Iteration: 4200 / 6000 [ 70%]  (Sampling)
Chain 3: Iteration: 4800 / 6000 [ 80%]  (Sampling)
Chain 3: Iteration: 5400 / 6000 [ 90%]  (Sampling)
Chain 3: Iteration: 6000 / 6000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 25.575 seconds (Warm-up)
Chain 3:                13.654 seconds (Sampling)
Chain 3:                39.229 seconds (Total)
Chain 3: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 2.3e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.23 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 6000 [  0%]  (Warmup)
Chain 4: Iteration:  600 / 6000 [ 10%]  (Warmup)
Chain 4: Iteration: 1200 / 6000 [ 20%]  (Warmup)
Chain 4: Iteration: 1800 / 6000 [ 30%]  (Warmup)
Chain 4: Iteration: 2400 / 6000 [ 40%]  (Warmup)
Chain 4: Iteration: 3000 / 6000 [ 50%]  (Warmup)
Chain 4: Iteration: 3001 / 6000 [ 50%]  (Sampling)
Chain 4: Iteration: 3600 / 6000 [ 60%]  (Sampling)
Chain 4: Iteration: 4200 / 6000 [ 70%]  (Sampling)
Chain 4: Iteration: 4800 / 6000 [ 80%]  (Sampling)
Chain 4: Iteration: 5400 / 6000 [ 90%]  (Sampling)
Chain 4: Iteration: 6000 / 6000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 26.281 seconds (Warm-up)
Chain 4:                10.768 seconds (Sampling)
Chain 4:                37.049 seconds (Total)
Chain 4: 
In [17]:
summary(model_sex)
 Family: gaussian 
  Links: mu = logit; sigma = identity 
Formula: auroc ~ 1 + sex + lag + sex * lag + (1 | id2/id) 
   Data: subset(data, !is.na(auroc)) (Number of observations: 300) 
  Draws: 4 chains, each with iter = 6000; warmup = 3000; thin = 10;
         total post-warmup draws = 1200

Multilevel Hyperparameters:
~id2 (Number of levels: 6) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     0.08      0.10     0.00     0.30 1.00     1060     1127

~id2:id (Number of levels: 30) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     0.34      0.05     0.25     0.45 1.00     1085     1309

Regression Coefficients:
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
Intercept         2.42      0.09     2.24     2.60 1.00     1011      986
sexfemale        -0.52      0.06    -0.64    -0.41 1.00     1294     1257
lag              -0.00      0.00    -0.00    -0.00 1.00     1288     1160
sexfemale:lag    -0.00      0.00    -0.00    -0.00 1.00     1295     1160

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     0.03      0.00     0.03     0.04 1.00     1128     1129

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
In [18]:
pp_sex <- summary(model_sex)$fixed |>
  as_tibble(rownames = "coef") |> 
  select(coef,
         pp_mean = Estimate, 
         pp_lower = `l-95% CI`, 
         pp_upper = `u-95% CI`) 

plot posterior distribution for sex effect

In [19]:
as.matrix(model_sex, variable = "b_sexfemale") |> 
  as_tibble() |> 
  ggplot(aes(x = b_sexfemale)) +
  geom_histogram(fill = "grey", color = "black", bins = 30) +
  geom_segment(mapping = aes(y = 300, yend = 350, x = pp_mean, xend = pp_mean),
               data = subset(pp_sex, coef == "sexfemale")) +
  geom_segment(mapping = aes(y = 325, yend = 325, x = pp_lower, xend = pp_upper),
                data = subset(pp_sex, coef == "sexfemale")) +
  geom_vline(xintercept = 0, linetype =  "dashed") 

plot posterior distribution for interaction effect

In [20]:
as.matrix(model_sex, variable = "b_sexfemale:lag") |> 
  ggplot(aes(x = `b_sexfemale:lag`)) +
  geom_histogram(fill = "grey", color = "black", bins = 30) +
    geom_segment(mapping = aes(y = 100, yend = 150, x = pp_mean, xend = pp_mean),
               data = subset(pp_sex, coef == "sexfemale:lag")) +
  geom_segment(mapping = aes(y = 125, yend = 125, x = pp_lower, xend = pp_upper),
                data = subset(pp_sex, coef == "sexfemale:lag")) +
  geom_vline(xintercept = 0, linetype =  "dashed") 

In [21]:
bayesplot::mcmc_trace(model_sex, pars = c("b_Intercept", "b_sexfemale", "b_lag", "b_sexfemale:lag"))

bayesplot::mcmc_acf(model_sex, pars = c("b_Intercept", "b_sexfemale", "b_lag", "b_sexfemale:lag"))

bayesplot::mcmc_dens(model_sex, pars = c("b_Intercept", "b_sexfemale", "b_lag", "b_sexfemale:lag"))

Check posteriors

In [22]:
pp_check(model_sex)
Using 10 posterior draws for ppc type 'dens_overlay' by default.

Income

In [23]:
data <- auroc_dem_all |> 
  select(id = fold_num, id2 = repeat_num, `above poverty`, `below poverty`, lag) |> 
  pivot_longer(cols = c(`above poverty`, `below poverty`), names_to = "income", 
               values_to = "auroc") |>
  mutate(income = factor(income)) |>
  glimpse()
Rows: 300
Columns: 5
$ id     <dbl> 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 1, …
$ id2    <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, …
$ lag    <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ income <fct> above poverty, below poverty, above poverty, below poverty, abo…
$ auroc  <dbl> 0.8902036, 0.9093397, 0.9092639, 0.9241711, 0.9392897, 0.902794…
model_income <- brm(
  formula = auroc ~ 1 + income + lag + income*lag + (1 | id2/id), # folds nested in repeats
  data = subset(data, !is.na(auroc)),
  family = gaussian(link = "logit"), # normal distribution w/auroc bounded between 0 and 1
  chains = 4,
  prior = priors,
  control = list(adapt_delta = 0.999), 
  iter = 6000,
  thin = 10,
  seed = 123
)
Compiling Stan program...
Trying to compile a simple C file
Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
using C compiler: ‘Apple clang version 13.1.6 (clang-1316.0.21.2.5)’
using SDK: ‘MacOSX12.3.sdk’
clang -arch arm64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/arm64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
In file included from <built-in>:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Dense:1:
In file included from /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/Core:19:
/Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
#include <cmath>
         ^~~~~~~
1 error generated.
make: *** [foo.o] Error 1
Start sampling

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 7.8e-05 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0.78 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:    1 / 6000 [  0%]  (Warmup)
Chain 1: Iteration:  600 / 6000 [ 10%]  (Warmup)
Chain 1: Iteration: 1200 / 6000 [ 20%]  (Warmup)
Chain 1: Iteration: 1800 / 6000 [ 30%]  (Warmup)
Chain 1: Iteration: 2400 / 6000 [ 40%]  (Warmup)
Chain 1: Iteration: 3000 / 6000 [ 50%]  (Warmup)
Chain 1: Iteration: 3001 / 6000 [ 50%]  (Sampling)
Chain 1: Iteration: 3600 / 6000 [ 60%]  (Sampling)
Chain 1: Iteration: 4200 / 6000 [ 70%]  (Sampling)
Chain 1: Iteration: 4800 / 6000 [ 80%]  (Sampling)
Chain 1: Iteration: 5400 / 6000 [ 90%]  (Sampling)
Chain 1: Iteration: 6000 / 6000 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 32.957 seconds (Warm-up)
Chain 1:                17.609 seconds (Sampling)
Chain 1:                50.566 seconds (Total)
Chain 1: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 2).
Chain 2: 
Chain 2: Gradient evaluation took 2.6e-05 seconds
Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0.26 seconds.
Chain 2: Adjust your expectations accordingly!
Chain 2: 
Chain 2: 
Chain 2: Iteration:    1 / 6000 [  0%]  (Warmup)
Chain 2: Iteration:  600 / 6000 [ 10%]  (Warmup)
Chain 2: Iteration: 1200 / 6000 [ 20%]  (Warmup)
Chain 2: Iteration: 1800 / 6000 [ 30%]  (Warmup)
Chain 2: Iteration: 2400 / 6000 [ 40%]  (Warmup)
Chain 2: Iteration: 3000 / 6000 [ 50%]  (Warmup)
Chain 2: Iteration: 3001 / 6000 [ 50%]  (Sampling)
Chain 2: Iteration: 3600 / 6000 [ 60%]  (Sampling)
Chain 2: Iteration: 4200 / 6000 [ 70%]  (Sampling)
Chain 2: Iteration: 4800 / 6000 [ 80%]  (Sampling)
Chain 2: Iteration: 5400 / 6000 [ 90%]  (Sampling)
Chain 2: Iteration: 6000 / 6000 [100%]  (Sampling)
Chain 2: 
Chain 2:  Elapsed Time: 35.192 seconds (Warm-up)
Chain 2:                19.175 seconds (Sampling)
Chain 2:                54.367 seconds (Total)
Chain 2: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 3).
Chain 3: 
Chain 3: Gradient evaluation took 2.6e-05 seconds
Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0.26 seconds.
Chain 3: Adjust your expectations accordingly!
Chain 3: 
Chain 3: 
Chain 3: Iteration:    1 / 6000 [  0%]  (Warmup)
Chain 3: Iteration:  600 / 6000 [ 10%]  (Warmup)
Chain 3: Iteration: 1200 / 6000 [ 20%]  (Warmup)
Chain 3: Iteration: 1800 / 6000 [ 30%]  (Warmup)
Chain 3: Iteration: 2400 / 6000 [ 40%]  (Warmup)
Chain 3: Iteration: 3000 / 6000 [ 50%]  (Warmup)
Chain 3: Iteration: 3001 / 6000 [ 50%]  (Sampling)
Chain 3: Iteration: 3600 / 6000 [ 60%]  (Sampling)
Chain 3: Iteration: 4200 / 6000 [ 70%]  (Sampling)
Chain 3: Iteration: 4800 / 6000 [ 80%]  (Sampling)
Chain 3: Iteration: 5400 / 6000 [ 90%]  (Sampling)
Chain 3: Iteration: 6000 / 6000 [100%]  (Sampling)
Chain 3: 
Chain 3:  Elapsed Time: 39.954 seconds (Warm-up)
Chain 3:                37.112 seconds (Sampling)
Chain 3:                77.066 seconds (Total)
Chain 3: 

SAMPLING FOR MODEL 'anon_model' NOW (CHAIN 4).
Chain 4: 
Chain 4: Gradient evaluation took 2.7e-05 seconds
Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0.27 seconds.
Chain 4: Adjust your expectations accordingly!
Chain 4: 
Chain 4: 
Chain 4: Iteration:    1 / 6000 [  0%]  (Warmup)
Chain 4: Iteration:  600 / 6000 [ 10%]  (Warmup)
Chain 4: Iteration: 1200 / 6000 [ 20%]  (Warmup)
Chain 4: Iteration: 1800 / 6000 [ 30%]  (Warmup)
Chain 4: Iteration: 2400 / 6000 [ 40%]  (Warmup)
Chain 4: Iteration: 3000 / 6000 [ 50%]  (Warmup)
Chain 4: Iteration: 3001 / 6000 [ 50%]  (Sampling)
Chain 4: Iteration: 3600 / 6000 [ 60%]  (Sampling)
Chain 4: Iteration: 4200 / 6000 [ 70%]  (Sampling)
Chain 4: Iteration: 4800 / 6000 [ 80%]  (Sampling)
Chain 4: Iteration: 5400 / 6000 [ 90%]  (Sampling)
Chain 4: Iteration: 6000 / 6000 [100%]  (Sampling)
Chain 4: 
Chain 4:  Elapsed Time: 34.666 seconds (Warm-up)
Chain 4:                9.797 seconds (Sampling)
Chain 4:                44.463 seconds (Total)
Chain 4: 
Warning: There were 18 transitions after warmup that exceeded the maximum treedepth. Increase max_treedepth above 10. See
https://mc-stan.org/misc/warnings.html#maximum-treedepth-exceeded
Warning: Examine the pairs() plot to diagnose sampling problems
In [24]:
summary(model_income)
 Family: gaussian 
  Links: mu = logit; sigma = identity 
Formula: auroc ~ 1 + income + lag + income * lag + (1 | id2/id) 
   Data: subset(data, !is.na(auroc)) (Number of observations: 300) 
  Draws: 4 chains, each with iter = 6000; warmup = 3000; thin = 10;
         total post-warmup draws = 1200

Multilevel Hyperparameters:
~id2 (Number of levels: 6) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     0.12      0.12     0.00     0.47 1.00     1105     1154

~id2:id (Number of levels: 30) 
              Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sd(Intercept)     0.48      0.08     0.35     0.65 1.00     1245     1182

Regression Coefficients:
                       Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
Intercept                  2.27      0.12     2.03     2.52 1.00     1119
incomebelowpoverty        -0.37      0.07    -0.50    -0.24 1.00     1075
lag                       -0.00      0.00    -0.00    -0.00 1.00     1086
incomebelowpoverty:lag    -0.00      0.00    -0.00     0.00 1.00     1220
                       Tail_ESS
Intercept                  1151
incomebelowpoverty         1171
lag                        1131
incomebelowpoverty:lag     1052

Further Distributional Parameters:
      Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
sigma     0.04      0.00     0.04     0.05 1.00     1354     1218

Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
and Tail_ESS are effective sample size measures, and Rhat is the potential
scale reduction factor on split chains (at convergence, Rhat = 1).
In [25]:
pp_income <- summary(model_income)$fixed |>
  as_tibble(rownames = "coef") |> 
  select(coef,
         pp_mean = Estimate, 
         pp_lower = `l-95% CI`, 
         pp_upper = `u-95% CI`) 

plot posterior distribution for income effect

In [26]:
as.matrix(model_income, variable = "b_incomebelowpoverty") |> 
  as_tibble() |> 
  ggplot(aes(x = b_incomebelowpoverty)) +
  geom_histogram(fill = "grey", color = "black", bins = 30) +
  geom_segment(mapping = aes(y = 225, yend = 275, x = pp_mean, xend = pp_mean),
               data = subset(pp_income, coef == "incomebelowpoverty")) +
  geom_segment(mapping = aes(y = 250, yend = 250, x = pp_lower, xend = pp_upper),
                data = subset(pp_income, coef == "incomebelowpoverty")) +
  geom_vline(xintercept = 0, linetype =  "dashed") 

plot posterior distribution for interaction effect

In [27]:
as.matrix(model_income, variable = "b_incomebelowpoverty:lag") |> 
  ggplot(aes(x = `b_incomebelowpoverty:lag`)) +
  geom_histogram(fill = "grey", color = "black", bins = 30) +
  geom_segment(mapping = aes(y = 125, yend = 175, x = pp_mean, xend = pp_mean),
               data = subset(pp_income, coef == "incomebelowpoverty:lag")) +
  geom_segment(mapping = aes(y = 150, yend = 150, x = pp_lower, xend = pp_upper),
                data = subset(pp_income, coef == "incomebelowpoverty:lag")) +
  geom_vline(xintercept = 0, linetype =  "dashed") 

In [28]:
bayesplot::mcmc_trace(model_income, pars = c("b_Intercept", "b_incomebelowpoverty", "b_lag", "b_incomebelowpoverty:lag"))

bayesplot::mcmc_acf(model_income, pars = c("b_Intercept", "b_incomebelowpoverty", "b_lag", "b_incomebelowpoverty:lag"))

bayesplot::mcmc_dens(model_income, pars = c("b_Intercept", "b_incomebelowpoverty", "b_lag", "b_incomebelowpoverty:lag"))

Check posteriors

In [29]:
pp_check(model_income)
Using 10 posterior draws for ppc type 'dens_overlay' by default.